import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
data=pd.read_csv('C:/Users/Rakesh/Datasets/BRCA.csv')
data.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | FEMALE | 0.080353 | 0.42638 | 0.54715 | 0.273680 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | FEMALE | -0.420320 | 0.57807 | 0.61447 | -0.031505 | II | Mucinous Carcinoma | Positive | Positive | Negative | Lumpectomy | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | FEMALE | 0.213980 | 1.31140 | -0.32747 | -0.234260 | III | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | FEMALE | 0.345090 | -0.21147 | -0.19304 | 0.124270 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Modified Radical Mastectomy | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | FEMALE | 0.221550 | 1.90680 | 0.52045 | -0.311990 | II | Infiltrating Ductal Carcinoma | Positive | Positive | Negative | Other | 06-May-17 | 27-Jun-19 | Dead |
data.isnull().sum()
Patient_ID 7 Age 7 Gender 7 Protein1 7 Protein2 7 Protein3 7 Protein4 7 Tumour_Stage 7 Histology 7 ER status 7 PR status 7 HER2 status 7 Surgery_type 7 Date_of_Surgery 7 Date_of_Last_Visit 24 Patient_Status 20 dtype: int64
data=data.dropna()
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 317 entries, 0 to 333 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Patient_ID 317 non-null object 1 Age 317 non-null float64 2 Gender 317 non-null object 3 Protein1 317 non-null float64 4 Protein2 317 non-null float64 5 Protein3 317 non-null float64 6 Protein4 317 non-null float64 7 Tumour_Stage 317 non-null object 8 Histology 317 non-null object 9 ER status 317 non-null object 10 PR status 317 non-null object 11 HER2 status 317 non-null object 12 Surgery_type 317 non-null object 13 Date_of_Surgery 317 non-null object 14 Date_of_Last_Visit 317 non-null object 15 Patient_Status 317 non-null object dtypes: float64(5), object(11) memory usage: 42.1+ KB
data.describe()
| Age | Protein1 | Protein2 | Protein3 | Protein4 | |
|---|---|---|---|---|---|
| count | 317.000000 | 317.000000 | 317.000000 | 317.000000 | 317.000000 |
| mean | 58.725552 | -0.027232 | 0.949557 | -0.095104 | 0.006713 |
| std | 12.827374 | 0.543858 | 0.906153 | 0.589027 | 0.625965 |
| min | 29.000000 | -2.144600 | -0.978730 | -1.627400 | -2.025500 |
| 25% | 49.000000 | -0.350600 | 0.368840 | -0.531360 | -0.382240 |
| 50% | 58.000000 | 0.005649 | 0.997130 | -0.193040 | 0.038522 |
| 75% | 67.000000 | 0.336260 | 1.612000 | 0.251210 | 0.436250 |
| max | 90.000000 | 1.593600 | 3.402200 | 2.193400 | 1.629900 |
stage=data['Tumour_Stage'].value_counts()
transactions = stage.index
quantity = stage.values
figure = px.pie(data, values=quantity, names=transactions, hole=0.5, title='Tumour Stages of Patients')
figure.show()
# Tumour Stage
histology = data["Histology"].value_counts()
transactions = histology.index
quantity = histology.values
figure = px.pie(data,
values=quantity,
names=transactions,hole = 0.5,
title="Histology of Patients")
figure.show()
data['ER status'].value_counts()
Positive 317 Name: ER status, dtype: int64
data['PR status'].value_counts()
Positive 317 Name: PR status, dtype: int64
data['HER2 status'].value_counts()
Negative 288 Positive 29 Name: HER2 status, dtype: int64
surgery = data["Surgery_type"].value_counts()
transactions = surgery.index
quantity = surgery.values
figure = px.pie(data,
values=quantity,
names=transactions,hole = 0.5,
title="Types of Surgery for Patients")
figure.show()
data['Tumour_Stage'] = data['Tumour_Stage'].map({'I':1,'II':2,'III':3})
data['Histology'] = data['Histology'].map({'Infiltrating Ductal Carcinoma':1, 'Infiltrating Lobular Carcinoma':2,"Mucinous Carcinoma": 3})
data['ER status'] = data['ER status'].map({'Positive':1})
data['PR status'] = data['PR status'].map({'Positive':1})
data['HER2 status'] = data['HER2 status'].map({'Positive': 1,'Negative': 2})
data['Gender'] = data['Gender'].map({'MALE':0, 'FEMALE':1})
data['Surgery_type'] = data['Surgery_type'].map({'Other':1, 'Modified Radical Mastectomy': 2, 'Lumpectomy':3, 'Simple Mastectomy':4})
data.head()
| Patient_ID | Age | Gender | Protein1 | Protein2 | Protein3 | Protein4 | Tumour_Stage | Histology | ER status | PR status | HER2 status | Surgery_type | Date_of_Surgery | Date_of_Last_Visit | Patient_Status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | TCGA-D8-A1XD | 36.0 | 1 | 0.080353 | 0.42638 | 0.54715 | 0.273680 | 3 | 1 | 1 | 1 | 2 | 2 | 15-Jan-17 | 19-Jun-17 | Alive |
| 1 | TCGA-EW-A1OX | 43.0 | 1 | -0.420320 | 0.57807 | 0.61447 | -0.031505 | 2 | 3 | 1 | 1 | 2 | 3 | 26-Apr-17 | 09-Nov-18 | Dead |
| 2 | TCGA-A8-A079 | 69.0 | 1 | 0.213980 | 1.31140 | -0.32747 | -0.234260 | 3 | 1 | 1 | 1 | 2 | 1 | 08-Sep-17 | 09-Jun-18 | Alive |
| 3 | TCGA-D8-A1XR | 56.0 | 1 | 0.345090 | -0.21147 | -0.19304 | 0.124270 | 2 | 1 | 1 | 1 | 2 | 2 | 25-Jan-17 | 12-Jul-17 | Alive |
| 4 | TCGA-BH-A0BF | 56.0 | 1 | 0.221550 | 1.90680 | 0.52045 | -0.311990 | 2 | 1 | 1 | 1 | 2 | 1 | 06-May-17 | 27-Jun-19 | Dead |
x=np.array(data[['Age', 'Gender', 'Protein1', 'Protein2', 'Protein3','Protein4',
'Tumour_Stage', 'Histology', 'ER status', 'PR status',
'HER2 status', 'Surgery_type']])
y=np.array(data[['Patient_Status']])
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.1, random_state=42)
model= SVC()
model.fit(xtrain, ytrain)
C:\Users\Rakesh\Downloads\Anaconda\lib\site-packages\sklearn\utils\validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
SVC()
features = np.array([[36.0, 1, 0.080353, 0.42638, 0.54715, 0.273680, 3, 1, 1, 1, 2, 2,]])
print(model.predict(features))
['Alive']